@inproceedings{sutton1999policy,
  title={Policy gradient methods for reinforcement learning with function approximation.},
  author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay},
  booktitle={NIPS},
  volume={99},
  pages={1057--1063},
  year={1999},
  organization={Citeseer}
}

@article{peters2008reinforcement,
  title={Reinforcement learning of motor skills with policy gradients},
  author={Peters, Jan and Schaal, Stefan},
  journal={Neural networks},
  volume={21},
  number={4},
  pages={682--697},
  year={2008},
  publisher={Elsevier}
}

@article{amari1998natural,
  title={Natural gradient works efficiently in learning},
  author={Amari, Shun-Ichi},
  journal={Neural computation},
  volume={10},
  number={2},
  pages={251--276},
  year={1998},
  publisher={MIT Press}
}

@book{sutton1998introduction,
  title={Introduction to reinforcement learning},
  author={Sutton, Richard S and Barto, Andrew G},
  year={1998},
  publisher={MIT Press}
}

@article{sutton1988learning,
  title={Learning to predict by the methods of temporal differences},
  author={Sutton, Richard S},
  journal={Machine learning},
  volume={3},
  number={1},
  pages={9--44},
  year={1988},
  publisher={Springer}
}

@electronic{rlcompetition2014website,
    title="RL Competition 2014",
    url="https://sites.google.com/site/rlcompetition2014/"
}

@electronic{rlcompetition2014Polyathlon,
    title="{RL Competition 2014, Polyathlon}",
    url="https://sites.google.com/site/rlcompetition2014/domains/polyathlon"
}

@inproceedings{wingate2011bayesian,
    title={Bayesian policy search with policy priors},
    author={Wingate, David and Goodman, Noah D and Roy, Daniel M and Kaelbling, Leslie P and Tenenbaum, Joshua B},
    booktitle={IJCAI Proceedings-International Joint Conference on Artificial Intelligence},
    volume={22},
    number={1},
    pages={1565},
    year={2011}
}

@article{bellemare2013arcade,
    title={The arcade learning environment: an evaluation platform for general agents},
    author={Bellemare, Marc G and Naddaf, Yavar and Veness, Joel and Bowling, Michael},
    journal={Journal of Artificial Intelligence Research},
    volume={47},
    number={1},
    pages={253--279},
    year={2013},
    publisher={AI Access Foundation}
}

@article{wang:2007,
  author = {Wang, Xuesong and Cheng, Yuhu and Yi, Jian-Qiang},
  date = {2007-09-28},
  journal = {Inf. Sci.},
  keywords = {dblp},
  number = 18,
  pages = {3764-3781},
  timestamp = {2007-09-28T00:00:00.000+0200},
  title = {A fuzzy Actor-Critic reinforcement learning network.},
  volume = 177,
  year = 2007
}

@techreport{broomhead:1988,
  title={Radial basis functions, multi-variable functional interpolation and adaptive networks},
  author={Broomhead, David S and Lowe, David},
  year={1988},
  institution={DTIC Document}
}

@inproceedings{shorten:1994,
  title={On normalising radial basis function networks},
  author={Shorten, Robert and Murray-Smith, Roderick},
  booktitle={Proceedings of the Fourth Irish Neural Network Conference, University College Dublin, Ireland},
  pages={213--217},
  year={1994}
}

@unpublished{silvernotes,
    title = {Lecture 7: policy gradient},
    author = {David Silver},
    year = {2015},
    note =  {{Lecture notes}}
}

@incollection{geist2010revisiting,
  title={Revisiting natural actor-critics with value function approximation},
  author={Geist, Matthieu and Pietquin, Olivier},
  booktitle={Modeling Decisions for Artificial Intelligence},
  pages={207--218},
  year={2010},
  publisher={Springer}
}